library(car)
## Loading required package: carData
library(ggplot2)
library(ggthemes)
library(mgcv)
## Loading required package: nlme
## This is mgcv 1.8-33. For overview type 'help("mgcv-package")'.
library(plot3D)
library(stringr)
require(MASS)
## Loading required package: MASS
Fonction
#Data frame prior
data.frame.prior = function(seq_ne, max_simu){
#Créer matrice vide qui contiendra les stats
mat_prior = matrix(data = NA, nrow = length(seq_ne)*max_simu, ncol = 4)
#Attribution nom colonnes selon appelation stat par MetHis
colnames(mat_prior) = c("Ne", "simu", "s1", "s2")
mat_prior = as.data.frame(mat_prior)
#Compteur pour indiquer les lignes à remplir
cpt = 0
for (ne in seq_ne) {
dir_ne = str_c("Ne", toString(ne), "/") #Répertoire de taille effective
row_min = cpt*max_simu+1
row_max = (cpt+1)*max_simu
mat_prior[row_min:row_max,1] = toString(ne)
cpt = cpt+1
for (nb_simu in 1:max_simu) {
mat_prior[row_min+nb_simu-1,2] = nb_simu
dir_simu = str_c("simu_", toString(nb_simu), "/")
file_path = str_c(dir_ne, dir_simu, "simu_", nb_simu, ".txt")
df_tmp = read.table(file_path, header = TRUE)
mat_prior[row_min+nb_simu-1,3:4] = df_tmp[1,5:6]
}
}
#Passage des Ne en facteur
mat_prior$Ne = factor(as.factor(mat_prior$Ne), levels = as.character(seq_ne))
#Passage simuulations en entier
mat_prior$simu = as.integer(mat_prior$simu)
return(mat_prior)
}
Fonction de lecture des stats résumées. La fonction est prévue pour lire les fichiers de stats résumées dans des répertoires de la forme “Ne100/simu_1” (en modifiant les valeurs de Ne et de simulations). Elle renvoie les résultats sous forme de data frame en ajoutant aux statistiques résumées les colonnes correspondant à l’effectif efficace et la simulation.
#Data frame construction
data.frame.stat = function(seq_ne, max_gen, max_simu){
#Créer matrice vide qui contiendra les stats
mat_stat = matrix(data = NA, nrow = length(seq_ne)*max_gen*max_simu, ncol = 63)
mat_stat = as.data.frame(mat_stat)
#Compteur pour indiquer les lignes à remplir
cpt = 0
for (ne in seq_ne) {
if (dir.exists(str_c("Ne", toString(ne), "/"))) {
dir_ne = str_c("Ne", toString(ne), "/") #Répertoire de taille effective
row_min = cpt*max_gen*max_simu+1 #Ligne min. où on écrit les stats pour le Ne donné
row_max = (cpt+1)*max_gen*max_simu #Ligne max. où on écrit les stats
mat_stat[row_min:row_max,1] = toString(ne) #Ecriture du Ne correspondant pour les stats qui seront écrites
cpt = cpt+1 #Incrémentation du compteur
for (nb_simu in 1:max_simu) {
dir_simu = str_c("simu_", toString(nb_simu), "/")
row_min_simu = (nb_simu-1)*max_gen + row_min
row_max_simu = nb_simu*max_gen + row_min -1
mat_stat[row_min_simu:row_max_simu,2] = nb_simu
file_path = str_c(dir_ne, dir_simu, "final_sumstats.txt")
file_stat = read.table(file_path, header = TRUE) #Lecture fichier stat résumées
#Suppression 1ère colonne, contenant uniquement chiffres pour tri en bash.
mat_stat[row_min_simu:row_max_simu,3:63] = file_stat
}
}
}
#Attribution nom colonnes selon appelation stat par MetHis
colnames(mat_stat) = c("Ne", "simu", names(file_stat))
#Passage des Ne en facteur
mat_stat$Ne = factor(as.factor(mat_stat$Ne), levels = as.character(seq_ne))
#Passage simulations en entier
mat_stat$simu = as.integer(mat_stat$simu)
#Passage générations en entier
mat_stat$Generation = as.integer(mat_stat$Generation)
#Passage stats en double
for (numcol in 4:ncol(mat_stat)) {
mat_stat[,numcol] = as.double(mat_stat[,numcol])
}
return(mat_stat)
}
Dans le cas où l’on travaille sur de très nombreuses simulations correspondant à un effectif efficace, cette fonction permet de calculer les statistiques moyennées pour les valeurs de Ne étudiées.
#Passage en moyenne
data.frame.mean = function(df_stat, max_gen, seq_ne){
lrow = length(seq_ne)
df_mean = matrix(data = NA, nrow = lrow*max_gen, ncol = 62 )
df_mean = as.data.frame(df_mean)
cpt = 0
for (ne in seq_ne) {
df_tmp = df_stat[which(df_stat$Ne == ne),]
row_min = cpt*max_gen+1
row_max = (cpt+1)*max_gen
df_mean[row_min:row_max, 1] = toString(ne)
cpt = cpt+1
for (gen in 0:(max_gen-1) ) {
df_mean[row_min+gen, 2] = gen
vec_tmp = apply(df_tmp[which(df_tmp$Generation == gen), 4:63], 2, mean)
df_mean[row_min+gen, 3:62] = vec_tmp
}
}
colnames(df_mean) = colnames(df_stat)[-2]
df_mean$Ne = factor(as.factor(df_mean$Ne), levels = seq_ne )
return(df_mean)
}
Fonction d’affichage des graphiques en 2D, avec un background blanc et les lignes horizontales de quadrillage affichées. Cette fonction permet d’afficher les lignes de régression ou les lignes reliant les points.
#Plot function
#Affichage d'une stat au cours des générations
plot_stat_gen = function(df, gen, stat, group_col, color_col, titre, ligne = FALSE, legd = TRUE,
size_point = 0.1, line_t = "solid"){
p = ggplot(df, aes(x = gen, y = stat,
group = group_col, color = color_col)) + ggtitle(titre)
if (ligne) {
p = p + geom_point(size = size_point, show.legend = legd)+
geom_line(aes(linetype = line_t), show.legend = legd)
}else{
#smooth de la courbe pour obtenir régression et tempérer variabilité due au TCL
p = p + geom_point(size = size_point, show.legend = legd)+ geom_smooth(show.legend = legd)
}
p = p + theme_classic()
p = p + theme(panel.grid.major.y = element_line(size = 0.5,
linetype = 'solid',
colour = "#BABABA"),
panel.grid.minor.y = element_line(size = 0.25,
linetype = 'solid',
colour = "#CECECE"))
#print(p)
return(p)
}
Cette fonction d’affichage de graphique reprend la précédente, mais sans affficher les points.
#Plot function
#Affichage d'une stat au cours des générations
plot_without_point = function(df, gen, stat, color_col, titre, legd = TRUE){
p = ggplot(df, aes(x = gen, y = stat, color = color_col)) + ggtitle(titre)
#smooth de la courbe pour obtenir régression et tempérer variabilité due au TCL
p = p + geom_smooth(show.legend = legd)
p = p + theme_classic()
p = p + theme(panel.grid.major.y = element_line(size = 0.5,
linetype = 'solid',
colour = "#BABABA"),
panel.grid.minor.y = element_line(size = 0.25,
linetype = 'solid',
colour = "#CECECE"))
#print(p)
return(p)
}
Fonction d’extraction d’une sous-matrice à partir d’une matrice principale en fonction de valeurs choisies^ de Ne.
extract_sub_mat = function(all_mat, list_ne){
all_rows = c()
for (ne in as.character(list_ne)) {
all_rows = append(all_rows, which(all_mat[,1] == ne))
}
return(all_mat[all_rows,])
}
Calcul des matrices de stats
seq_1024_16384 = 2**seq(10,14,1)
seq_50_1000 = seq(50,1000,1)
seq_tot = sort(c(seq_50_1000, seq_1024_16384))
mat_tot = data.frame.stat(seq_tot, 101, 1)
mat_50_1000 = extract_sub_mat(mat_tot, c(seq(50,200,10), seq(200,1000,100)))
mat_1024_16384 = extract_sub_mat(mat_tot, 2**seq(6,14,1))
#mat_1024_16384_mean = data.frame.mean(mat_1024_16384, 101, seq_1024_16384)
Graphiques obtenus : affichage de l’évolution d’une statistique donnée en fonction des générations colorée selon la Ne initiale.
p_tmp = plot_stat_gen(mat_tot, mat_tot$Generation,
mat_tot$mean.het.adm, mat_tot$Ne, mat_tot$Ne,
"Stat en fonction des générations\n selon différentes Ne initiales",
legd = FALSE)
p_tmp = p_tmp+geom_hline(yintercept = mat_50_1000$mean.het.s1[1], color = "red")
p_tmp = p_tmp+geom_hline(yintercept = mat_50_1000$mean.het.s2[1], color = "black")
print(p_tmp)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
p_tmp = plot_stat_gen(mat_50_1000, mat_50_1000$Generation,
mat_50_1000$mean.het.adm, mat_50_1000$Ne, mat_50_1000$Ne,
"Stat en fonction des générations\nselon différentes Ne initiales",
legd = TRUE)
p_tmp = p_tmp+geom_hline(yintercept = mat_50_1000$mean.het.s1[1], color = "red")
p_tmp = p_tmp+geom_hline(yintercept = mat_50_1000$mean.het.s2[1], color = "black")
print(p_tmp)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
plot_stat_gen(mat_1024_16384, mat_1024_16384$Generation,
mat_1024_16384$mean.het.adm, mat_1024_16384$Ne, mat_1024_16384$Ne,
"Stat en fonction des générations\n selon différentes Ne initiales")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
plot_stat_gen(mat_tot, mat_tot$Generation,
mat_tot$Fst.s1.adm, mat_tot$Ne,mat_tot$Ne,
"Stat en fonction des générations\n selon différentes Ne initiales",
legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
plot_stat_gen(mat_50_1000, mat_50_1000$Generation,
mat_50_1000$Fst.s1.adm, mat_50_1000$Ne,mat_50_1000$Ne,
"Stat en fonction des générations\n selon différentes Ne initiales")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
plot_stat_gen(mat_1024_16384, mat_1024_16384$Generation,
mat_1024_16384$Fst.s1.adm, mat_1024_16384$Ne,mat_1024_16384$Ne,
"Stat en fonction des générations\n selon différentes Ne initiales")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Réutilisation de la fonction de lecture de stats résumées pour des populations croissantes. Celles-ci sont enregistrées dans des répertoires de la forme “Ne100-XXX/Ne100-1000/simu_1” Fonction de lecture des stats résumées. La fonction est prévue pour lire les fichiers de stats résumées dans des répertoires de la forme “NeXXX/simu_XXX” (avec XXX remplacés par les valeurs de Ne et de simulations). Elle renvoie les résultats sous forme de data frame en ajoutant aux statistiques résumées les colonnes correspondant à l’effectif efficace et la simulation.
data.frame.increase = function(seq_ne_ini, seq_combi){
for (ne_ini in seq_ne_ini) {
if (dir.exists(str_c("Ne", ne_ini, "-XXX/"))) {
dir_ne = str_c("Ne", ne_ini, "-XXX/")
setwd(dir_ne)
motif_detect = str_c("^",ne_ini,"-")
vec_ne_tmp = seq_combi[which(str_detect(seq_combi, motif_detect))]
mat_tmp = data.frame.stat(seq_ne = vec_ne_tmp, max_gen = 101, max_simu = 1)
if (ne_ini == seq_ne_ini[1]) {
mat_pop_inc = mat_tmp
}else{
mat_pop_inc = rbind(mat_pop_inc, mat_tmp)
}
setwd("../")
}
}
return(na.omit(mat_pop_inc))
}
Lecture des data frame pour les combinaison de Ne entre ne0 (ne_ini) et nef (ne_fin). On extrait ensuite les stats pour lesquelles on a un nef de 10000.
setwd("../new_methis_pop_increase_50000_snp/")
seq_ne_ini = seq(50,100,2)
seq_ne_fin = seq(100, 10000, 100)
seq_combi = as.data.frame(outer(seq_ne_ini, seq_ne_fin, FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]
mat_pop_inc = data.frame.increase(seq_ne_ini, seq_combi)
mat_end_10000 = extract_sub_mat(mat_pop_inc,
as.data.frame(outer(seq(50,100,4),
c("10000"),
FUN="paste",
sep="-"))[,1])
plot_stat_gen(mat_pop_inc, mat_pop_inc$Generation,
mat_pop_inc$f3, mat_pop_inc$Ne,mat_pop_inc$Ne,
"Stat en fonction des générations\n selon différentes Ne initiales",
FALSE, legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
plot_stat_gen(mat_pop_inc, mat_pop_inc$Generation,
mat_pop_inc$mean.het.adm, mat_pop_inc$Ne,mat_pop_inc$Ne,
"Stat en fonction des générations\n selon différentes Ne initiales",
FALSE, legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
plot_stat_gen(mat_end_10000, mat_end_10000$Generation,
mat_end_10000$mean.het.adm, mat_end_10000$Ne,mat_end_10000$Ne,
"Moyenne hétérozygotie en fonction des générations\n selon différentes Ne initiales",
legd = TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
plot_stat_gen(mat_end_10000, mat_end_10000$Generation,
mat_end_10000$f3, mat_end_10000$Ne,mat_end_10000$Ne,
"F3 en fonction des générations\n selon différentes Ne initiales",
legd = TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
setwd("../new_methis_pop_increase_50000_snp_Nu_var/")
seq_nu = sprintf("%.2f", seq(0.05, 0.45, 0.05))
seq_ne_ini = seq(50,100,10)
seq_ne_fin = seq(100, 10000, 200)
seq_combi = as.data.frame(outer(seq_ne_ini, seq_ne_fin, FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]
list_df_nu = list()
cpt = 1
for (nu in seq_nu) {
if(dir.exists(str_c("Nu", nu ))){
dir_nu = str_c("Nu", nu, "/")
setwd(dir_nu)
list_df_nu[[cpt]] = data.frame.increase(seq_ne_ini, seq_combi)
cpt = cpt+1
setwd("../")
}
}
for (combi_lst in 1:length(list_df_nu)) {
if (combi_lst == 1) {
mat_pop_nu = list_df_nu[[combi_lst]]
mat_pop_nu = data.frame(mat_pop_nu, Nu = seq_nu[combi_lst])
}else{
mat_tmp = list_df_nu[[combi_lst]]
mat_tmp = data.frame(mat_tmp, Nu = seq_nu[combi_lst])
mat_pop_nu = rbind(mat_pop_nu, mat_tmp)
}
}
mat_pop_nu$Nu = as.factor(mat_pop_nu$Nu)
plot_without_point(mat_pop_nu, mat_pop_nu$Generation,
mat_pop_nu$mean.het.adm, mat_pop_nu$Ne,
"Stat en fonction des générations\n selon différentes Ne initiales\n et couleurs selon nu",
legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
plot_without_point(mat_pop_nu, mat_pop_nu$Generation,
mat_pop_nu$mean.het.adm, mat_pop_nu$Nu,
"Stat en fonction des générations\n selon différentes Ne initiales\n et couleurs selon nu",
legd = TRUE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# plot_stat_gen(mat_pop_nu, mat_pop_nu$Generation,
# mat_pop_nu$mean.het.adm, mat_pop_nu$Ne,
# "Stat en fonction des générations\n selon différentes Ne initiales",
# TRUE, legd = FALSE)
# plot_stat_gen(mat_pop_nu, mat_pop_nu$Generation,
# mat_pop_nu$mean.het.adm, mat_pop_nu$Nu,
# "Stat en fonction des générations\n selon différentes Ne initiales",
# TRUE, legd = FALSE)
for (rg_list in 1:length(list_df_nu)) {
p_tmp = plot_stat_gen(list_df_nu[[rg_list]], list_df_nu[[rg_list]]$Generation,
list_df_nu[[rg_list]]$mean.het.adm, list_df_nu[[rg_list]]$Ne,
list_df_nu[[rg_list]]$Ne,
str_c("Nu = ",seq_nu[rg_list]), TRUE,legd = FALSE)
p_tmp = p_tmp+geom_hline(yintercept = list_df_nu[[rg_list]]$mean.het.s1[1],
color = "red")
p_tmp = p_tmp+geom_hline(yintercept = list_df_nu[[rg_list]]$mean.het.s2[1],
color = "black")
print(p_tmp)
}
for (rg_list in 1:length(list_df_nu)) {
list_ne = list_df_nu[[rg_list]]$Ne[which(list_df_nu[[rg_list]]$Generation == 100)]
list_ne = as.integer(unlist(str_split(list_ne, "-")))
list_ne0 = list_ne[seq(1, length(list_ne), 2)]
list_nef = list_ne[seq(2, length(list_ne), 2)]
mat_tmp = list_df_nu[[rg_list]][which(list_df_nu[[rg_list]]$Generation == 100),]
scatter3D(x = list_ne0, y = list_nef, z = mat_tmp$mean.het.adm,
bty = "b2", pch = 16, ticktype = "detailed", main = seq_nu[rg_list],
theta = 120, phi = 0, type = "s", cex.lab = 0.6, cex.axis = 0.5,
xlab = "Ne0", ylab = "Nef", zlab = "Stat")
}
grid.lines = 50
fit <- loess(mat_tmp$mean.het.adm ~ list_nef + list_ne0)
x.pred <- seq(min(list_ne0), max(list_ne0), length.out = grid.lines)
y.pred <- seq(min(list_nef), max(list_nef), length.out = grid.lines)
xy <- expand.grid( x = x.pred, y = y.pred)
z.pred <- matrix(predict(fit, newdata = xy),
nrow = grid.lines, ncol = grid.lines)
## Warning: 'newdata' had 2500 rows but variables found have 300 rows
fitpoints = predict(fit)
scatter3D(x = list_ne0, y = list_nef, z = mat_tmp$mean.het.adm,
bty = "b2", pch = 16, ticktype = "detailed", main = seq_nu[rg_list],
theta = 150, phi = 0, type = "s",cex = 0.5, cex.lab = 0.6, cex.axis = 0.5,
xlab = "Ne0", ylab = "Nef", zlab = "Stat",
surf = list(x = x.pred, y = y.pred, z = z.pred,
facets = NA))
Calcul matrice pour connaître Ne à chaque génération.
setwd("../new_methis_pop_increase_50000_snp_Nu_var/")
mat_ne_inc = data.frame()
cpt = 1
for (nu in seq_nu) {
if(dir.exists(str_c("Nu", nu ))){
dir_nu = str_c("Nu", nu, "/")
setwd(dir_nu)
for (ne_ini in seq_ne_ini) {
if (dir.exists(str_c("Ne", ne_ini, "-XXX/"))) {
dir_ne = str_c("Ne", ne_ini, "-XXX/")
setwd(dir_ne)
seq_combi = as.data.frame(outer(ne_ini, seq_ne_fin,
FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]
for (combi in seq_combi) {
if (dir.exists(str_c("Ne", combi))) {
dir_ne_bis = str_c("Ne", combi, "/simu_1/")
setwd(dir_ne_bis)
cpt = cpt + 1
file_tmp = read.table("simu_1.par", header = TRUE)
if (nu == seq_nu[1] && ne_ini==seq_ne_ini[1] && combi == seq_combi[1]) {
mat_ne_inc = as.data.frame(file_tmp[,2])
}else{
mat_ne_inc = cbind(mat_ne_inc, file_tmp[,2])
}
setwd("../../")
}
}
setwd("../")
}
}
setwd("../")
}
}
mat_pop_nu = data.frame(mat_pop_nu,
ne_gen = unlist(mat_ne_inc , use.names = F),
n0 = rep(NA, nrow(mat_pop_nu)),
nf = rep(NA, nrow(mat_pop_nu)) )
for (i in 1:nrow(mat_pop_nu)) {
vec_tmp = as.integer(unlist(str_split(mat_pop_nu$Ne[i], "-")))
mat_pop_nu$n0[i] = vec_tmp[1]
mat_pop_nu$nf[i] = vec_tmp[2]
}
for (rg_nu in seq_nu) {
rg_nu
df_tmp = mat_pop_nu[which(mat_pop_nu$Nu == rg_nu),]
grid.lines = 30
fit <- loess(df_tmp$mean.het.adm ~ df_tmp$Generation + as.numeric(df_tmp$ne_gen))
x.pred <- seq(min(df_tmp$Generation), max(df_tmp$Generation), length.out = grid.lines)
y.pred <- seq(min(as.numeric(df_tmp$ne_gen)),
max(as.numeric(df_tmp$ne_gen)), length.out = grid.lines)
xy <- expand.grid( x = x.pred, y = y.pred)
z.pred <- matrix(predict(fit, newdata = xy),
nrow = grid.lines, ncol = grid.lines)
scatter3D(x = df_tmp$Generation, y = df_tmp$ne_gen, z = df_tmp$mean.het.adm,
bty = "b2", pch = 16, ticktype = "detailed", main = seq_nu[rg_nu],
theta = 210, phi = 20, type = "s",cex = 0.2, cex.lab = 0.6, cex.axis = 0.5,
xlab = "Gen", ylab = "Ne", zlab = "Stat",
surf = list(x = x.pred, y = y.pred, z = z.pred,facets = NA))
}
## Warning: 'newdata' had 900 rows but variables found have 30300 rows
## Warning: 'newdata' had 900 rows but variables found have 30300 rows
## Warning: 'newdata' had 900 rows but variables found have 30300 rows
## Warning: 'newdata' had 900 rows but variables found have 30300 rows
## Warning: 'newdata' had 900 rows but variables found have 30300 rows
## Warning: 'newdata' had 900 rows but variables found have 30300 rows
## Warning: 'newdata' had 900 rows but variables found have 30300 rows
## Warning: 'newdata' had 900 rows but variables found have 30300 rows
## Warning: 'newdata' had 900 rows but variables found have 30300 rows
seq_extract = c("0.05", "0.25", "0.45")
df_extract = mat_pop_nu[which(mat_pop_nu$Nu == seq_extract),]
scatter3D(x = mat_pop_nu$Generation, y = mat_pop_nu$ne_gen,
z = as.numeric(as.character(mat_pop_nu$Nu)),
bty = "b2", pch = 16, ticktype = "detailed",
theta = 210, phi = 50, type = "s", cex.lab = 0.6, cex.axis = 0.5,
xlab = "Gen", ylab = "Ne", zlab = "Nu")
setwd("../new_methis_pop_increase_50000_snp_Nu_var/")
seq_nu_bis = sprintf("%.2f", c(0.01, 0.25, 0.49))
seq_ne_ini = c(100)
seq_ne_fin = seq(1000, 10000, 1000)
seq_combi = as.data.frame(outer(seq_ne_ini, seq_ne_fin, FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]
df_nu_reduce = data.frame()
cpt = 1
for (nu in seq_nu_bis) {
if(dir.exists(str_c("Nu", nu ))){
dir_nu = str_c("Nu", nu, "/")
setwd(dir_nu)
df_tmp = data.frame.increase(seq_ne_ini, seq_combi)
col_nu = as.integer(rep(nu, nrow(df_tmp)))
col_cpt = rep(cpt, nrow(df_tmp))
col_n0 = rep(NA, nrow(df_tmp))
col_nf = rep(NA, nrow(df_tmp))
cpt = cpt + 1
if (nu == seq_nu_bis[1]) {
df_nu_reduce = data.frame(df_tmp, nu = col_nu, cpt = col_cpt,
n0 = col_n0, nf = col_nf)
}else{
df_tmp = data.frame(df_tmp, nu = col_nu, cpt = col_cpt,
n0 = col_n0, nf = col_nf)
df_nu_reduce = rbind(df_nu_reduce, df_tmp)
}
setwd("../")
}
}
for (i in 1:nrow(df_nu_reduce)) {
vec_tmp = as.integer(unlist(str_split(df_nu_reduce$Ne[i], "-")))
df_nu_reduce$n0[i] = vec_tmp[1]
df_nu_reduce$nf[i] = vec_tmp[2]
}
ne_gen = data.frame()
cpt = 1
for (nu in seq_nu_bis) {
if(dir.exists(str_c("Nu", nu ))){
dir_nu = str_c("Nu", nu, "/")
setwd(dir_nu)
for (ne_ini in seq_ne_ini) {
if (dir.exists(str_c("Ne", ne_ini, "-XXX/"))) {
dir_ne = str_c("Ne", ne_ini, "-XXX/")
setwd(dir_ne)
seq_combi = as.data.frame(outer(ne_ini, seq_ne_fin,
FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]
for (combi in seq_combi) {
if (dir.exists(str_c("Ne", combi))) {
dir_ne_bis = str_c("Ne", combi, "/simu_1/")
setwd(dir_ne_bis)
cpt = cpt + 1
file_tmp = read.table("simu_1.par", header = TRUE)
if (nu == seq_nu_bis[1] && ne_ini==seq_ne_ini[1] && combi == seq_combi[1]) {
ne_gen = as.data.frame(file_tmp[,2])
}else{
ne_gen = rbind(ne_gen, file_tmp[,2])
}
setwd("../../")
}
}
setwd("../")
}
}
setwd("../")
}
}
df_nu_reduce_gen_100 = df_nu_reduce[which(df_nu_reduce$Generation == 100),]
scatter3D(x = df_nu_reduce$Generation, y = df_nu_reduce$nf,
z = df_nu_reduce$mean.het.adm,
ticktype = "detailed",bty = "b2",pch = 16,
theta = 210, phi = 20, type = "s", cex.lab = 0.6, cex.axis = 0.5,
xlab = "generation", ylab = "Nf", zlab = "stat")
# scatter3D(x = df_tmp$Generation, y = vec_tot, z = df_tmp$mean.het.adm,
# bty = "b2", pch = 16, ticktype = "detailed", main = seq_nu[rg_nu],
# theta = 210, phi = 20, type = "s", cex.lab = 0.6, cex.axis = 0.5,
# xlab = "Gen", ylab = "Ne", zlab = "Stat")
setwd("../new_methis_bottleneck_50000_snp/")
seq_alpha = c(0.1, 0.5, 0.9)
seq_nu_red = c(0.01, 0.25, 0.49)
seq_bottle = seq(10, 90, 10)
seq_ne0 = c(1000)
seq_nef = c(1000)
seq_combi = as.data.frame(outer(seq_ne0, seq_nef, FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]
cpt = 1
for (alpha in seq_alpha) {
for (nu in seq_nu_red) {
for (bottle in seq_bottle) {
change_dir = str_c("alpha", alpha, "/Nu", nu, "/bottle",bottle,"/")
setwd(change_dir)
if (alpha==seq_alpha[1] & nu==seq_nu_red[1] & bottle==seq_bottle[1]) {
mat_bottle = data.frame.increase(seq_ne0, seq_combi)
col_alpha = rep(alpha, nrow(mat_bottle))
col_nu = rep(nu, nrow(mat_bottle))
col_bottle = rep(bottle, nrow(mat_bottle))
col_cpt = rep(cpt, nrow(mat_bottle))
mat_bottle = data.frame(mat_bottle, alpha = col_alpha,
U = col_nu, time_botl = col_bottle, cpt = col_cpt)
}else{
mat_tmp = data.frame.increase(seq_ne0, seq_combi)
col_alpha = rep(alpha, nrow(mat_tmp))
col_nu = rep(nu, nrow(mat_tmp))
col_bottle = rep(bottle, nrow(mat_tmp))
col_cpt = rep(cpt, nrow(mat_tmp))
mat_tmp = data.frame(mat_tmp, alpha = col_alpha,
U = col_nu, time_botl = col_bottle, cpt = col_cpt)
mat_bottle = rbind(mat_bottle, mat_tmp)
}
cpt = cpt + 1
setwd("../../../")
}
}
}
mat_bottle$alpha = as.factor(mat_bottle$alpha)
mat_bottle$U = as.factor(mat_bottle$U)
mat_bottle$time_botl = as.factor(mat_bottle$time_botl)
mat_bottle$cpt = as.factor(mat_bottle$cpt)
mat_bottle_time_50 = mat_bottle[which(mat_bottle$time_botl == 50),]
mat_bottle_time_20 = mat_bottle[which(mat_bottle$time_botl == 20),]
mat_bottle_time_80 = mat_bottle[which(mat_bottle$time_botl == 80),]
plot_without_point(mat_bottle, mat_bottle$Generation,
mat_bottle$mean.het.adm, mat_bottle$alpha,
"Stat en fonction des générations\n selon différents bottleneck",
legd = TRUE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
p = plot_stat_gen(mat_bottle_time_20, mat_bottle_time_20$Generation,
mat_bottle_time_20$mean.het.adm, mat_bottle_time_20$alpha,
mat_bottle_time_20$alpha,
"Stat en fonction des générations\n selon différents bottleneck",FALSE,
legd = TRUE)
p = p+ geom_vline(xintercept = 1, size = 0.4, color = "orange")+
geom_vline(xintercept = 21, size = 0.4, color = "orange")
print(p)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
p = plot_stat_gen(mat_bottle_time_50, mat_bottle_time_50$Generation,
mat_bottle_time_50$mean.het.adm, mat_bottle_time_50$alpha,
mat_bottle_time_50$alpha,
"Stat en fonction des générations\n selon différents bottleneck",FALSE,
legd = TRUE)
p = p+ geom_vline(xintercept = 1, size = 0.4, color = "orange")+
geom_vline(xintercept = 51, size = 0.4, color = "orange")
print(p)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
p = plot_stat_gen(mat_bottle_time_80, mat_bottle_time_80$Generation,
mat_bottle_time_80$mean.het.adm, mat_bottle_time_80$alpha,
mat_bottle_time_80$alpha,
"Stat en fonction des générations\n selon différents bottleneck",FALSE,
legd = TRUE)
p = p+ geom_vline(xintercept = 1, size = 0.4, color = "orange")+
geom_vline(xintercept = 81, size = 0.4, color = "orange")
print(p)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
p = plot_stat_gen(mat_bottle_time_80, mat_bottle_time_80$Generation,
mat_bottle_time_80$mean.het.adm, mat_bottle_time_80$cpt,
mat_bottle_time_80$alpha, size_point = 0,
"Stat en fonction des générations\nbottleneck 80 N0 1000 Nf 1000",TRUE,
legd = TRUE, line_t = mat_bottle_time_80$U)
p = p+ geom_vline(xintercept = 1, size = 0.4, color = "orange")+
geom_vline(xintercept = 81, size = 0.4, color = "orange")+
labs(color = "alpha", linetype = "U")
print(p)
vec_tmp = c()
vec_tmp[which(mat_bottle$alpha == 0.1)] = 2
vec_tmp[which(mat_bottle$alpha == 0.5)] = 3
vec_tmp[which(mat_bottle$alpha == 0.9)] = 4
scatter3D(x = mat_bottle$Generation, y = as.numeric(as.character(mat_bottle$time_botl)),
z = mat_bottle$mean.het.adm, col = vec_tmp,
ticktype = "detailed",bty = "b2",pch = 16,
theta = 360, phi = 0, type = "l",cex = 0.2, cex.lab = 0.6, cex.axis = 0.5,
xlab = "generation", ylab = "bottle", zlab = "stat")